/******************************************************************************
Safer in School? The Impact of Compulsory Schooling on Maltreatment and Associated Harms
by Adam A. Dzulkipli, Nicole Black, David W. Johnston, and Leonie Segal

Description: This program recreates additional results/analyses which are discussed
in the main text of the paper, but not formally presented in any table or figure.

Note that directory paths have been removed to preserve privacy. Please insert 
the relevant directories in the global macros under "Setup" before running the
.do file. This code was written and implemented in Stata MP 19.5, with
a computational time of approximately ~4 hours.

Packages required include:
•	ftools
•	reghdfe
•	estout
•	rdrobust
•	pzms
•	rddisttestk (.ado file by Brigham Frandsen available from
	https://sites.google.com/view/brighamfrandsen/software?authuser=0)
•	rddensity
•	rdhonest

*****************************************************************************/

********************************************************************************
*	Setup
********************************************************************************
clear all
set more off
macro drop all
capture log close

global raw 
global tmp 
global clean 
global output 

********************************************************************************
*	Effect of the reform on CPS notifications, for children previously reported by teachers
********************************************************************************

use "${clean}analysis_final.dta", clear

* Construct treatment/reform indicator for children previously reported by school notifiers
gen schcps_post1 = (cps_age16_sch == 1 & h2 >= 0)

* Estimate RD treatment effects, including the above indicator and interaction terms
local h = 721

reghdfe cps_after16_binary nocps_post1 anycps_post1 schcps_post1 ///
	cps_age16_binary cps_age16_sch ///
	h2 c.h2#post c.h2#cps_age16_binary c.h2#post#cps_age16_binary ///
	c.h2#cps_age16_sch c.h2#post1#cps_age16_sch ///
	if abs(h2) <= `h', vce(robust) absorb(monthofbirth#cps_age16_binary)

* Cumulative effect of the reform on children previously reported by school notifiers
local beta = _b[anycps_post1] + _b[schcps_post1]
qui sum cps_after16_binary if post1 == 0 & cps_age16_binary == 1
local mean_anycp = r(mean)

di `beta'/`mean_anycp'
test anycps_post1 + schcps_post1 = 0

********************************************************************************
*	Effect of the reform on CPS notifications by school vs. non-school notifiers
********************************************************************************

use "${clean}analysis_final.dta", clear

*** Estimate effect of the reform on CPS notifications by reporter type
local h = 721

* CPS notifications by school-based notifiers
reghdfe cps_after16_sch nocps_post1 anycps_post1 cps_age16_binary ///
	h2 c.h2#post c.h2#cps_age16_binary c.h2#post#cps_age16_binary ///
	if abs(h2) <= `h', vce(robust) absorb(monthofbirth#cps_age16_binary)
	
local beta = _b[nocps_post1]
qui sum cps_after16_sch if post1 == 0 & cps_age16_binary == 0
local mean_anycp = r(mean)

di `beta'
di `beta'/`mean_anycp'
test `beta' = 0

* CPS notifications by all other notifiers (i.e. non-school)
reghdfe cps_after16_nonschool nocps_post1 anycps_post1 cps_age16_binary ///
	h2 c.h2#post c.h2#cps_age16_binary c.h2#post#cps_age16_binary ///
	if abs(h2) <= `h', vce(robust) absorb(monthofbirth#cps_age16_binary)
	
local beta = _b[nocps_post1]
qui sum cps_after16_nonschool if post1 == 0 & cps_age16_binary == 0
local mean_anycp = r(mean)

di `beta'
di `beta'/`mean_anycp'
test `beta' = 0

********************************************************************************
*	Cost-savings from first-time cases of maltreatment averted
********************************************************************************

use "${clean}analysis_final.dta", clear

* Number of children born post cut-off (without CPS involvement)
count if cps_age16_binary == 0 & post1 == 1

// 61,474 children over 48 months (i.e. 4 years) after cut-off
// 15,369 children annually

* Estimated number of children saved from a first-time maltreatment incident
di 0.006 * 15369 // 92 children annually

/*
  Use costs from McCarthy et al. (2016) of first-time incident cases of maltreatment
  ($505,194 per child in 2014-15 AUD) to calculate overall cost-savings.
*/
di 92 * 505194 // $46,477,848 annually
 
*******************************************************************************
*	Cost-savings from ED visits (extensive margin) averted among CPS children
*******************************************************************************

use "${clean}analysis_final.dta", clear

* Number of children born post cut-off (with past CPS involvement)
count if cps_age16_binary == 1 & post1 == 1

// 16,075 children over 48 months (i.e. 4 years) after cut-off
// 4,019 children annually

* Estimated number of children not ending up in ED
di 0.039 * 4019 // 157 children annually

/*
  Use average cost of ED visit from NHCDC in SA ($605 per ED presentation in 2014-15)
  to calculate overall cost-savings to the healthcare system.
  (https://www.ihacpa.gov.au/health-care/costing/national-hospital-cost-data-collection/national-hospital-cost-data-collection-public-sector)
*/
di 157 * 605 // $94,985 in ED costs averted annually

*******************************************************************************
*	Estimated overnment expenditure associated with the RoSLA reform
*******************************************************************************

use "${clean}analysis_final.dta", clear

* Number of children born with public school records post cut-off (by CPS involvement)
tab cps_age16_binary if post1 == 1 & rec_census15 == 1

// 29,743 non-CPS children over 48 months (i.e. 4 years) after cut-off
// 7,436 non-CPS children annually

// 12,148 CPS children over 48 months (i.e. 4 years) after cut-off
// 3,037 CPS children annually

* Calculate number of aditional children enrolled in school annually
di (0.033 * 7436) + (0.055 * 3037) // 412 additional children enrolled each year

/*
  Use government recurrent funding for school education from ACARA ($14,286 per
  student in 2014-15) to estimate the additional government funding related to the reform
  https://www.acara.edu.au/reporting/national-report-on-schooling-in-australia/school-income
*/
di 412 * 14286 // $5,885,832 in government funding

********************************************************************************
*	Bandwidth selection using Calonico, Cattaneo, and Titiunik (2014) (CCT)
********************************************************************************

use "${clean}analysis_final.dta", clear

/*
  Use 'rdbwselect' to calculate MSE-optimal bandwidth of CCT, averaged across the
  main outcomes and restricted to the subgroup of children with past CPS involvement.
  Note that we omit the regularisation term (i.e. scaleregul(0)) following advice
  in Card et al. (2015) and Card et al. (2017) that this may yield bandwidths
  which are inappropriate in certain empirical settings.
*/
foreach y in enrol16 enrol17 cps_after16_binary evered16 evered_17to21{
	capture drop bw_`y'
	rdbwselect `y' h2 if cps_age16_binary == 1, p(1) kernel(uni) scaleregul(0)
	gen bw_`y' = round(e(h_mserd))
}

gen bw_cct = 0
foreach y in enrol16 enrol17 cps_after16_binary evered16 evered_17to21{
	replace bw_cct = bw_cct + bw_`y'
}
di bw_cct/5

********************************************************************************
*	Bandwidth selection using Kettlewell and Siminski (2022)
	/*
	  Notes: We consider bandwidths ranging from 400 days to 900 days in the
	  optimal bandwidth calculation. This results in a placebo zone of 212 placebo
	  thresholds to test the RD specification and calculate the MSE of the model.
	  Defining a lower minimum bandwidth will reduce the available number of
	  placebo thresholds for the optimal bandwidth selection process.
	*/
********************************************************************************

use "${clean}analysis_final.dta", clear

* Restrict sample to children with past CPS involvement
keep if cps_age16_binary == 1

* Calculate optimal bandwidth for CPS outcome
local var enrol16 enrol17 cps_after16_binary evered16 evered_17to21

foreach y in `var'{
	pzms `y' h2, minbw(400) maxbw(900) pzstepsize(1) bwstepsize(1) covs(mb*) vce(hc3)

	gen bw_`y' = e(pz_winbw)
	gen est_`y' = e(pz_est)
	gen se_`y' = e(pz_se)
	gen pval_`y' = e(pz_p)
	gen rand_`y' = e(pz_altp)
}

gen bw_ks = 0
foreach y in enrol16 enrol17 cps_after16_binary evered16 evered_17to21{
	replace bw_ks = bw_ks + bw_`y'
}
di bw_ks/5

* Calculate optimal bandwidth for ED outcomes
local var evered16 evered_17to21

foreach y in `var'{
	pzms `y' h2, minbw(400) maxbw(900) pzstepsize(1) bwstepsize(1) covs(mb*) vce(hc3)
	
	gen bw_`y' = e(pz_winbw)
	gen est_`y' = e(pz_est)
	gen se_`y' = e(pz_se)
	gen pval_`y' = e(pz_p)
	gen rand_`y' = e(pz_altp)
}

* Calculate optimal bandwidth for schooling outcomes (restrict to public school sample)
keep if rec_census15 == 1

local var enrol16 enrol17

foreach y in `var'{
	pzms `y' h2, minbw(400) maxbw(900) pzstepsize(1) bwstepsize(1) covs(mb*) vce(hc3)
	
	gen bw_`y' = e(pz_winbw)
	gen est_`y' = e(pz_est)
	gen se_`y' = e(pz_se)
	gen pval_`y' = e(pz_p)
	gen rand_`y' = e(pz_altp)
}

********************************************************************************
*	Testing for manipulation of the running variable
********************************************************************************

use "${clean}analysis_final.dta", clear

* Perform density test of Cattaneo et al. (2020), using a uniform kernel
rddensity h2, h(721) p(1) kernel(uniform)
di `e(pv_q)'

* Perform density test of Cattaneo et al. (2020), using a triangular kernel
rddensity h2, h(721) p(1) kernel(triangular)
di `e(pv_q)'

/*
  Perform density test of Frandsen (2017), which accounts for the discrete nature
  of the running variable. Set k = 0 for the most conservative density test.
  Note that to run this test, the user first needs to install the 'rddisttestk.ado'
  file provided by Brigham Frandsen on the following website:
  https://sites.google.com/view/brighamfrandsen/software?authuser=0
*/
capture rddisttestk h2, threshold(0) k(0)
di `r(p)'

/********************************************************************************
References:

Calonico, Sebastian, Matias D. Cattaneo, and Rocio Titiunik, "Robust Nonparametric
	Confidence Intervals for Regression-Discontinuity Designs," Econometrica,
	2014, 82 (6), 2295–2326.
Card, D., Lee, D. S., Pei, Z., & Weber, A. (2015). Inference on causal effects in a
	generalized regression kink design. Econometrica, 83 , 2453{2483.
Card, D., Lee, D. S., Pei, Z., & Weber, A. (2017). Regression kink design: Theory
	and practice. In Regression Discontinuity Designs (Advances in Econometrics,
	Vol. 38) (pp. 341{382). Emerald Publishing Limited.
Cattaneo, Matias D., Michael Jansson, and Xinwei Ma, "Simple Local Polynomial
	Density Estimators," Journal of the American Statistical Association, 2020, 115 (531),
	1449–1455.
Frandsen, Brigham R, "Party Bias in Union Representation Elections: Testing for
	Manipulation in the Regression Discontinuity Design when the Running Variable is
	Discrete," in "Regression Discontinuity Designs: Theory and Applications," Emerald
	Publishing Limited, 2017, pp. 281–315.
Kettlewell, Nathan and Peter Siminski, "Optimal Model Selection in RDD and Related
	Settings Using Placebo Zones," Working Paper, University of Technology Sydney
	2022.
********************************************************************************/


